Generating the samples

p_grid = seq(from=0 , to=1, length.out=1000)
prior = rep(1, 1000 )
likelihood = dbinom(6, size=9, prob=p_grid)
posterior = likelihood * prior
posterior = posterior / sum(posterior)
set.seed(100)
samples = sample(p_grid, prob=posterior, size=1e4, replace=TRUE)

To show that the samples come in a random order, we can display the first 500.

plot(1:500,samples[1:500],
     ylab = "p",
     xlab = "n",
     type = 'l')

3E1 & 3E2 Proportion posterior < .2 & > .8

It is always a good idea to plot a histogram of the posterior, if possible with meaningful axis limits

hist(samples,
     xlim = c(0,1))

Here is small function, which plots colored histograms and the results.

colored.hist = function(data, lower = NULL, upper = NULL, length.out = 51, main = "") {
  if(is.numeric(main)) main=round(main,4)
  h = hist(data, breaks = seq(0,1,length.out = length.out), plot = F)
  cols = rep("white",length(h$breaks))
  if (!is.null(lower)) cols[h$breaks < lower] = "red"
  if (!is.null(upper)) cols[h$breaks > upper] = "red"
  plot(h, col = cols, main = main)
}


# using that R interprets FALSE as 0 and TRUE as 1
p_smaller_0.2 = mean(samples < .2)
p_larger_0.8 = mean(samples > .8)
par(mfrow = c(1,2))
colored.hist(samples, lower = .2, main = p_smaller_0.2)
colored.hist(samples, upper = .8, main = p_larger_0.8)

3E3 Proportion posterior > .2 && < .8

mean(samples > .2 & samples < .8)
## [1] 0.888
colored.hist(samples, lower = .2, upper = .8,
             main = mean(samples > .2 & samples < .8))

One cannot simply do mean(.2 < sample < .8)!

3E4 20% of posterior below x, 3E4 20% of posterior above x

Here we need to use the quantile function

lowest_20_percent = quantile(samples,.2)
highest_20_percent = quantile(samples,.8)

colored.hist(samples,
             lower = lowest_20_percent,
             upper = highest_20_percent,
             length.out = 51)
text(lowest_20_percent,400, round(lowest_20_percent,3), pos = 2)
text(highest_20_percent,425, round(highest_20_percent,3), pos = 4)

3E6 Border values of narrowest 66% interval

This is the highest density posterior interval.

HPDI(samples,.66)
##     |0.66     0.66| 
## 0.5085085 0.7737738

3E7 Border values of the central 66% interval

This is commonly called the credible interval.

c(
  quantile(samples,(1-.66)/2),
  quantile(samples,1-(1-.66)/2)
  )
##       17%       83% 
## 0.5025025 0.7697698

or

PI(samples,.66)
##       17%       83% 
## 0.5025025 0.7697698

3M1 Construct posterior for new data with grid approximation

Everything except the data is the same as at the beginning of the exercises

likelihood = dbinom(8, size=15, prob=p_grid)
posterior = likelihood * prior
posterior = posterior / sum(posterior)
plot(p_grid,posterior,'l') # use 'l' to get a line and not dots

3M2 10000 samples and 90% HDPI

We use the HDPI function from the rethinking package.

samples = sample(p_grid, prob=posterior, size=1e4, replace=TRUE )
HPDI(samples, prob = .9)
##      |0.9      0.9| 
## 0.3293293 0.7167167

3M3 posterior predictive check

A few general tips

posterior_predictions = 
  rbinom(n = length(samples),
         size = 15,
         prob = samples)

par(mfrow = c(2,1))

posterior_predictions %>% 
  table() %>% 
  prop.table() %>% 
  plot(ylab = "Proportion of samples",
       xlab = "N water")

# Now the same result with hard-to-read code
plot(prop.table(table(posterior_predictions)),ylab = "Proportion of samples", xlab = "N water")

mean(posterior_predictions == 8)
## [1] 0.1444

The probability to get 8 out of 15 is not 1, but it is with 14% the most likely outcome.

3M4 Probability 6 in 9, given previous posterior

We only need to change the size, and can then look at the predictions.

posterior_predictions = 
  rbinom(n = length(samples),
         size = 9,
         prob = samples)

posterior_predictions %>% 
  table() %>% 
  prop.table() %>% 
  plot(ylab = "Proportion of samples",
       xlab = "N water")

mean(posterior_predictions == 6)
## [1] 0.1751

3M5 Probability 6 in 9, given previous posterior

Maybe this is a bit advanced, but we will write a little function that does all we need for us instead of doing everything manually:

get_samples = function(p_grid, prior, n_tosses, n_water) {
  likelihood = dbinom(n_water, size=n_tosses, prob=p_grid)
  posterior = likelihood * prior
  posterior = posterior / sum(posterior)
  set.seed(100)
  samples = sample(p_grid, prob=posterior, size=1e4, replace=TRUE)
  return(samples)
}

get_my_results = function(p_grid, prior) {
  
  samples = get_samples(p_grid, prior, n_tosses = 9, n_water = 6)
  spls_15_8 = get_samples(p_grid, prior, n_tosses = 15, n_water = 8)
  
  results = c(
    p_below_0.2 = mean(samples < .2),                #E1
    p_above_0.8 = mean(samples > .8),                #E2
    p_betweeb_0.2_0.8 = mean(samples > .2 & samples < .8), #E3
    quantile_0.2 = quantile(samples,.2),             #E4
    quantile_0.8 = quantile(samples,.8),             #E5
    HDPI_66.lower = HPDI(samples,.66)[1],            #E6a
    HDPI_66.upper = HPDI(samples,.66)[2],            #E6b
    CI_66.lower = quantile(samples,(1-.66)/2),       #E7a
    CI_66.upper = quantile(samples,1-(1-.66)/2),     #E7b
    HDPI_90.lower = HPDI(spls_15_8,.90)[1],          #M2a
    HDPI_90.upper = HPDI(spls_15_8,.90)[2],          #M2b
    post_pred_8_15 = mean(rbinom(n = 10e4,size = 15,prob = spls_15_8) == 8), #M3
    post_pred_6_9 = mean(rbinom(n = 10e4,size = 9,prob = spls_15_8) == 6)   #M4
  )
  return(results)
}

get_my_results(p_grid = p_grid,
               prior = prior)
##         p_below_0.2         p_above_0.8   p_betweeb_0.2_0.8    quantile_0.2.20% 
##           0.0004000           0.1116000           0.8880000           0.5185185 
##    quantile_0.8.80% HDPI_66.lower.|0.66 HDPI_66.upper.0.66|     CI_66.lower.17% 
##           0.7557558           0.5085085           0.7737738           0.5025025 
##     CI_66.upper.83%  HDPI_90.lower.|0.9  HDPI_90.upper.0.9|      post_pred_8_15 
##           0.7697698           0.3343343           0.7217217           0.1478100 
##       post_pred_6_9 
##           0.1757100
prior_flat = rep(1,1000)
prior_05 = prior_flat
prior_05[p_grid < 0.5] = 0

results = 
  cbind(prior_flat = get_my_results(p_grid, prior = prior_flat),
        prior_0.5 = get_my_results(p_grid, prior = prior_05))

results
##                     prior_flat prior_0.5
## p_below_0.2          0.0004000 0.0000000
## p_above_0.8          0.1116000 0.1368000
## p_betweeb_0.2_0.8    0.8880000 0.8632000
## quantile_0.2.20%     0.5185185 0.5815816
## quantile_0.8.80%     0.7557558 0.7729730
## HDPI_66.lower.|0.66  0.5085085 0.5385385
## HDPI_66.upper.0.66|  0.7737738 0.7507508
## CI_66.lower.17%      0.5025025 0.5715716
## CI_66.upper.83%      0.7697698 0.7857858
## HDPI_90.lower.|0.9   0.3343343 0.5005005
## HDPI_90.upper.0.9|   0.7217217 0.7097097
## post_pred_8_15       0.1478100 0.1583700
## post_pred_6_9        0.1757100 0.2327600

To understand the differences, we can look at the entire posterior distributions.

Here are the posterior predictions for posteriors from different priors, and the predictions when using the true proportion of water.

Posterior predictions

For instance, our best guess for the probability of water with the stair-shapes prior and 8 out of 15 water landings is 0.6052395. Now we can simulate what happens if we plug this parameter in and make 10 predictions:

post_preds = 
  rbinom(n = 10,
       size = 15,
       prob = mean(post_0.5))
post_preds
##  [1]  7  7 10  8  9 11  9  9  6  6

What is the mean of these predictions, divided by the number of tosses?

mean(post_preds)/15
## [1] 0.5466667

What if we make not 10 but 1000 predictions:

post_preds = 
  rbinom(n = 1000,
       size = 15,
       prob = mean(post_0.5)) 
mean(post_preds)/15
## [1] 0.6036667

There we go. Averaged over many predictions, we get the right proportion, even if there is some variations in predictions from one parameter value, as can be seen in the next figure.

3M6 Credible Interval with width of .05

There is no unique solution for this question, because for proportions the standard error is a function of the proportion itself:

\[ se_{prop} = \sqrt{\frac{p(1-p)}{n}} \] We can illustrate this with an arbitrary sample size.

se_prop = function(p) {
  sqrt((p*(1-p))/50)
}

curve(se_prop(x),
      xlab = "proportion",
      ylab = "standard error")

For this exercise we assume that the target proportion is 0.7.

First we write a function that takes sample size n and proportion p as input and returns the width of the 99% CI.

interval_width = function(p,n) {
  p_grid = seq(0,1,length.out = 10000)
  likelihood = dbinom(round(n*p), size = n,  prob=p_grid)
  posterior = likelihood * prior
  posterior = posterior / sum(posterior)
  set.seed(123)
  samples = sample(p_grid, prob=posterior, size=1e5, replace=TRUE)
  width = diff(PI(samples,prob = .99))
  return(width)
}

Next we try a coarse grid-search to find where roughly the correct sample size will be.

ns = seq(100,5000,250)
i.width = 
  do.call(c,
          lapply(ns, function(n) interval_width(.7,n))
  )


plot(ns,i.width,'l',
      ylab = "interval width",
      xlab = "sample size")
 abline(h = .05)

Now the we know that it is around 2000, we can search with a finer grid around this area.

ns = seq(1500,3000,25)

i.width = 
  do.call(c,
        lapply(ns, function(n) interval_width(.7,n))
        )

minimum_sample_size = min(ns[i.width < .05])
minimum_sample_size
## [1] 2225

Here is a plot of the grid-search result. We also add plots for proportions of .6 and .8, just to show how this influences the width of the CI.

Questions

Clarification

  • “Please concisely delineate the difference, advantages, disadvantages, and cases of use for grid approximation, quadratic approximation, and MCMC.”
  • #Questions: what do they mean by ‘posterior distribution constructed from the NEW (8/15) data’? Is it also the case that we take w above (the number of Water in 15 tosses - 10,0000 observations) and build a posterior from it? We think we’re probably wrong there, but it will be great to have a clear explanation of what they mean by NEW (8/15) data.
  • Does HPDI always include the most frequent value of the posterior probability? (Ref. page 57)
  • Ref. 3M5: Why are there so many simulated observations below the 0.5 proportion threshold (in this case 7.5), when the simulated data is based on a posterior with no values below p=0.5
  • Is MAP the same as the mode? (Ref. page 59)
  • The equation on page 62: What does “!” mean? Would be helpful with a walk-through of the elements of the equation.
  • On page 61, it is suggested that simulated data can be used to conduct power analyses. We thought this could be relevant for 3M6, but did not how. Is this something we will come back to later in the course?
  • Why do we use this input for the flat prior? prob_p <- rep(1, 1000)

Disussion

  • “An increasing number of researchers is implementing Bayesian methods, which inevitably introduces an influx of people that are inexperienced in Bayesian methods. It becomes more likely that many will implement the methods in a way that is not really sound as they do not fully grasp the underlying assumptions. Could this lead to a ‘Bayesian backlash’?“